import time
from selenium import webdriver
# from https://blog.csdn.net/qq_42689278/article/details/84590798

browser=webdriver.Chrome()
browser.get("http://www.toutiao.com")

browser.implicitly_wait(1)
browser.maximize_window()
browser.implicitly_wait(1)

link1=browser.find_element_by_link_text("娱乐")
link1.click()

link2=browser.find_element_by_link_text('电影')
link2.click()

title_list,url_list,imgurl_list,time_list=[],[],[],[]


def get_info():
    titles=browser.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/div/div[1]/a")
    for title in titles:
        title_list.append(title.text)
    urls=browser.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/div/div[1]/a")
    for url in urls:
        url_list.append(url.get_attribute('href'))
    images=browser.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/a/img")
    for image in images:
        imgurl_list.append(image.get_attribute('src'))
    times=browser.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/div/div[2]/div[1]/span")
    for tim in times:
        time_list.append(tim.text)

def get_manyinfo():
    browser.execute_script("window.scrollTo(0,1000);")
    time.sleep(1)
    while len(title_list)<40:
        for i in range(30):
            browser.execute_script("window.scrollTo(0,document.body.scrollHeight);")
            time.sleep(1)
        get_info()
        browser.refresh()
    else:
        browser.close()

def save_info():
    infos=zip(title_list,url_list,time_list,imgurl_list)
    for info in infos:
        print(info)
        data={
            '标题':info[0],
            'url':info[1],
            '时间':info[2],
            '图片url':info[3]
        }

    print('数据写入成功')

def main():
    get_manyinfo()
    save_info()

if __name__=="__main__":
    main()